For the various SEPP algorithms, we end manipulating an upper-triangular matrix.
In [1]:
import numpy as np
def back(t):
t = np.asarray(t)
return np.zeros_like(t) + 1.5
def trig(dt):
return 0.7 * np.exp(-0.2 * np.asanyarray(dt))
def rand(n):
t = np.random.random(n)
t.sort()
for i in range(len(t)-1):
if t[i] == t[i+1]:
raise AssertionError()
return t
In [2]:
def slow(times):
p = np.zeros((len(times), len(times)))
for i, t in enumerate(times):
p[i,i] = back(t)
for j, s in enumerate(times[:i]):
p[j,i] = trig(t-s)
for i in range(p.shape[0]):
p[:,i] /= np.sum(p[:,i])
return p
np.testing.assert_allclose(slow([1]), [[1]])
b = 1.5
a = trig(1)
np.testing.assert_allclose(slow([1,2]), [[1, a/(a+b)], [0,b/(a+b)]])
In [3]:
def f1(times):
times = np.asarray(times)
p = np.zeros((len(times), len(times)))
for i, t in enumerate(times):
p[i,i] = back(t)
p[:i, i] = trig(t - times[:i])
return p / np.sum(p, axis=0)[None,:]
for n in range(1, 10):
for _ in range(100):
times = rand(n)
np.testing.assert_allclose(slow(times), f1(times))
In [4]:
def f2(times):
times = np.asarray(times)
d = times.shape[0]
p = np.empty((d,d))
for i, t in enumerate(times):
p[i,i] = back(t)
p[:i, i] = trig(t - times[:i])
p[i+1:, i] = 0
return p / np.sum(p, axis=0)[None,:]
for n in range(1, 10):
for _ in range(100):
times = rand(n)
np.testing.assert_allclose(slow(times), f2(times))
In [5]:
def f3(times):
# More space efficient
times = np.asarray(times)
d = times.shape[0]
p = np.empty(d*(d+1)//2)
offset = int()
for i, t in enumerate(times):
p[offset+i] = back(t)
p[offset : offset+i] = trig(t - times[:i])
norm = np.sum(p[offset : offset+i+1])
p[offset : offset+i+1] /= norm
offset += i + 1
return p
def to_compressed(p):
d = p.shape[0]
pp = np.empty(d*(d+1)//2)
offset = 0
for i in range(d):
pp[offset : offset + i + 1] = p[:i+1,i]
offset += i + 1
return pp
for n in range(1, 10):
for _ in range(100):
times = rand(n)
np.testing.assert_allclose(to_compressed(slow(times)), f3(times))
In [6]:
def f4(times):
times = np.asarray(times)
d = times.shape[0]
diag = back(times)
offdiag = np.empty(d * (d-1) // 2)
offset = 0
for i, t in enumerate(times):
offdiag[offset : offset+i] = trig(t - times[:i])
norm = diag[i] + np.sum(offdiag[offset : offset+i])
diag[i] /= norm
offdiag[offset : offset+i] /= norm
offset += i
return diag, offdiag
def split(p):
d = p.shape[0]
diag = np.empty(d)
for i in range(d):
diag[i] = p[i,i]
offdiag = np.empty(d * (d-1) // 2)
offset = 0
for i in range(1, d):
offdiag[offset : offset+i] = p[:i,i]
offset += i
return diag, offdiag
for n in range(1, 10):
for _ in range(10):
times = rand(n)
np.testing.assert_allclose(split(slow(times))[0], f4(times)[0])
np.testing.assert_allclose(split(slow(times))[1], f4(times)[1])
In [7]:
def f5(times):
times = np.asarray(times)
d = times.shape[0]
p = np.zeros((d,d))
p[np.diag_indices(d)] = back(times)
for i, t in enumerate(times):
p[:i, i] = trig(t - times[:i])
return p / np.sum(p, axis=0)[None,:]
for n in range(1, 10):
for _ in range(100):
times = rand(n)
np.testing.assert_allclose(slow(times), f5(times))
In [8]:
def f6(times):
times = np.asarray(times)
d = times.shape[0]
dt = times[None,:] - times[:,None]
dt = np.ma.array(dt, mask=dt<=0)
p = trig(dt)
p[np.diag_indices(d)] = back(times)
p /= np.sum(p, axis=0)[None,:]
return p
for n in range(1, 10):
for _ in range(100):
times = rand(n)
np.testing.assert_allclose(slow(times), f6(times))
In [9]:
times = rand(10)
%timeit(f1(times))
%timeit(f2(times))
%timeit(f3(times))
%timeit(f4(times))
%timeit(f5(times))
%timeit(f6(times))
In [10]:
times = rand(100)
%timeit(f1(times))
%timeit(f2(times))
%timeit(f3(times))
%timeit(f4(times))
%timeit(f5(times))
%timeit(f6(times))
In [11]:
times = rand(1000)
%timeit(f1(times))
%timeit(f2(times))
%timeit(f3(times))
%timeit(f4(times))
%timeit(f5(times))
%timeit(f6(times))
In [12]:
times = rand(10000)
%timeit(f1(times))
%timeit(f2(times))
%timeit(f3(times))
%timeit(f4(times))
%timeit(f5(times))
%timeit(f6(times))
Conclusions:
f5 wins (i.e. construct the matrix as a matrix using numpy as much as possible).f4 is quicker.
In [23]:
def sslow(times):
p = slow(times)
diag_sum = 0
for i in range(len(times)):
diag_sum += p[i,i]
off_diag_sum = 0
weighted_sum = 0
for i in range(len(times)):
for j in range(i):
off_diag_sum += p[j,i]
weighted_sum += p[j,i] * (times[i] - times[j])
return diag_sum, off_diag_sum, weighted_sum
assert sslow([1]) == (1,0,0)
assert np.all(np.abs(np.asarray(sslow([1,2])) - np.asarray([1.72355007, 0.27644993, 0.27644993])) < 1e-8)
In [38]:
def s0(times):
p = f5(times)
diag_sum = np.sum(p[np.diag_indices(p.shape[0])])
off_diag_sum, weighted_sum = 0, 0
for i, t in enumerate(times):
off_diag_sum += np.sum(p[:i, i])
weighted_sum += np.sum(p[:i, i] * (t - times[:i]))
return diag_sum, off_diag_sum, weighted_sum
for n in range(1, 100):
times = rand(n)
exp = np.asarray(sslow(times))
got = np.asarray(s0(times))
assert np.all(np.abs(exp - got) < 1e-10)
In [39]:
def s1(times):
# From `f5`
times = np.asarray(times)
d = times.shape[0]
p = np.zeros((d,d))
p[np.diag_indices(d)] = back(times)
for i, t in enumerate(times):
p[:i, i] = trig(t - times[:i])
p /= np.sum(p, axis=0)[None,:]
diag_sum = np.sum(p[np.diag_indices(d)])
off_diag_sum = np.sum(p) - diag_sum
weighted_sum = 0
for i, t in enumerate(times):
weighted_sum += np.sum(p[:i, i] * (t - times[:i]))
return diag_sum, off_diag_sum, weighted_sum
for n in range(1, 100):
times = rand(n)
exp = np.asarray(sslow(times))
got = np.asarray(s1(times))
assert np.all(np.abs(exp - got) < 1e-10)
In [40]:
def s2(times):
# Optimised s1
times = np.asarray(times)
d = times.shape[0]
diag = back(times)
off_diag_sum, weighted_sum = 0, 0
for i, t in enumerate(times):
tt = t - times[:i]
off_diag = trig(tt)
odt = np.sum(off_diag)
total = odt + diag[i]
diag[i] /= total
off_diag_sum += odt / total
weighted_sum += np.sum(off_diag * tt) / total
return np.sum(diag), off_diag_sum, weighted_sum
for n in range(1, 100):
times = rand(n)
exp = np.asarray(sslow(times))
got = np.asarray(s2(times))
assert np.all(np.abs(exp - got) < 1e-10)
In [41]:
times = rand(10)
%timeit(s0(times))
%timeit(s1(times))
%timeit(s2(times))
In [42]:
times = rand(100)
%timeit(s0(times))
%timeit(s1(times))
%timeit(s2(times))
In [43]:
times = rand(1000)
%timeit(s0(times))
%timeit(s1(times))
%timeit(s2(times))
In [44]:
times = rand(10000)
%timeit(s0(times))
%timeit(s1(times))
%timeit(s2(times))
Conclusion: Writing hand-tuned code is quicker, but not that much quicker, than doing the naive thing.
In [ ]: